TASKS/QUESTIONS:
# Install Packages
# install.packages(c("ggplot2", "ggforce", "forecast", "ggfortify", "ggpubr", "car", "dplyr","rstatix", "coin", "multcomp", "extrafont", "extrafontdb", "ggthemes"))
# library(ggplot2)
library(ggforce)
library(forecast)
library(ggfortify)
library(ggpubr)
library(car)
library(heatmaply)
library(scales)
library(rstatix)
library(ggpubr)
library(coin)
library(multcomp)
library(extrafont)
library(extrafontdb)
library(ggthemes)
library(rcartocolor)
# install.packages("Polychrome")
library(Polychrome)
library(tidyverse)
library(here)
# get data ready
# here::i_am(path = "./Code/SERDP_report_support/CompleteDatasets_rerun.R")
# import new files (summer 2024) -----
d_jba_wat_new<-read.csv(here("./Data/SERDP_report_support/410-165666_JBA_water_data_reformatted.csv"))
d_jba_sed_new<-read.csv(here("./Data/SERDP_report_support/410-173731_JBA_sediment_data_reformatted.csv"))
d_wg_wat_new<-read.csv(here("./Data/SERDP_report_support/410-169030_WG_water_data_reformatted.csv"))
# import old data (pre 2024) -----
d_jba_biota<-read.csv(here("./Data/Original_Brown_etal/JBA/JBA_Biota_Data.csv"))
d_jba_wat<-read.csv(here("./Data/Original_Brown_etal/JBA/JBA_Water_Data.csv"))
d_jba_sed<-read.csv(here("./Data/Original_Brown_etal/JBA/JBA_Sediment_Data.csv"))
d_wg_biota<-read.csv(here("./Data/Original_Brown_etal/WG/WG_Biota_Data.csv"))
d_wg_wat<-read.csv(here("./Data/Original_Brown_etal/WG/WG_Water_Data.csv"))
d_wg_sed<-read.csv(here("./Data/Original_Brown_etal/WG/WG_Sediment_Data.csv"))
# make data long and get the same colnames between two systems - water
# data needs: long format
# WG (sed, water, sed) -------
d_wg_wat_noSum <- d_wg_wat %>%
dplyr::rename(Spp_Loc = Location) %>%
dplyr::select(SampleID, Spp_Loc, SampleDate, Precipitation,
PFBA, PFPeA, PFBS, PFHxA, PFPeS,
PFHpA, PFHxS, PFOA, SixTwoFTS,
PFHpS, PFNA, PFOS) %>% # 12 PFAS
pivot_longer(cols = c(PFBA, PFPeA, PFBS, PFHxA, PFPeS,
PFHpA, PFHxS,PFOA, SixTwoFTS, PFHpS, PFNA, PFOS),
names_to = "Analyte") %>%
mutate(Media = "Surface Water",
SampleDate = strptime(SampleDate, format = "%d-%b-%y"))
d_wg_sed_noSum <- d_wg_sed %>%
dplyr::rename(Spp_Loc = Location) %>%
dplyr::select(SampleID, Spp_Loc, SampleDate, PFBA, PFPeA, PFBS, PFHxA, PFPeS,
PFHpA, PFHxS,PFOA, SixTwoFTS, PFHpS, PFNA, PFOS) %>% # 12 PFAS
pivot_longer(cols = c(PFBA, PFPeA, PFBS, PFHxA, PFPeS,
PFHpA, PFHxS,PFOA, SixTwoFTS, PFHpS, PFNA, PFOS),
names_to = "Analyte") %>%
mutate(Media = "Sediment",
SampleDate = strptime(SampleDate, format = "%d-%b-%y"))
d_wg_sed_noSum<-tibble::add_column(d_wg_sed_noSum, Precipitation = NA, .before = "Analyte")
d_wg_biota_noSum <- d_wg_biota %>%
dplyr::rename(Spp_Loc = Species) %>%
dplyr::select(SampleID, Spp_Loc, SampleDate, PFBA, PFPeA, PFBS, PFHxA, PFPeS,
PFHpA, PFHxS,PFOA, SixTwoFTS, PFHpS, PFNA, PFOS) %>% # 12 PFAS
pivot_longer(cols = c(PFBA, PFPeA, PFBS, PFHxA, PFPeS,
PFHpA, PFHxS,PFOA, SixTwoFTS, PFHpS, PFNA, PFOS),
names_to = "Analyte") %>%
mutate(Media = "Biota",
SampleDate = strptime(SampleDate, format = "%d-%b-%y"))
d_wg_biota_noSum<-tibble::add_column(d_wg_biota_noSum, Precipitation = NA, .before = "Analyte")
# JBA (sed, wat, biota) -------
d_jba_wat_noSum <- d_jba_wat %>%
dplyr::rename(Spp_Loc = Location) %>%
dplyr::select(Spp_Loc, SampleDate, Precipitation,
PFBA, PFPrS, ThreeThreeFTCA ,PFPeA ,PFBS,
FourTwoFTS, PFHxA, PFPeS, FiveThreeFTCA, PFHpA,
ADONA, PFHxS, SixTwoFTS, PFOA, PFHpS,
SevenThreeFTCA, PFNA, PFOSA, PFOS, PFDA,
EightTwoFTS , PFNS , MeFOSAA ,EtFOSAA, PFDS,
TenTwoFTS, PFDoA , MeFOSA , PFTrDA, PFDoS,
PFTeDA, PFHxDA) %>% # 32 PFAS
pivot_longer(cols = c(PFBA, PFPrS, ThreeThreeFTCA ,PFPeA ,PFBS,
FourTwoFTS, PFHxA, PFPeS, FiveThreeFTCA, PFHpA,
ADONA, PFHxS, SixTwoFTS, PFOA, PFHpS,
SevenThreeFTCA, PFNA, PFOSA, PFOS, PFDA,
EightTwoFTS , PFNS , MeFOSAA ,EtFOSAA, PFDS,
TenTwoFTS, PFDoA , MeFOSA , PFTrDA, PFDoS,
PFTeDA, PFHxDA),
names_to = "Analyte") %>%
mutate(Media = "Surface Water",
SampleDate = strptime(SampleDate, format = "%d-%b-%y"))
d_jba_sed_noSum <- d_jba_sed %>%
dplyr::rename(Spp_Loc = Location,
SamepleID = Sample.ID) %>%
dplyr::select(Spp_Loc, SampleDate,
PFBA , PFPeA , PFHxA , PFHpA , PFOA ,
PFNA , PFDA ,PFUnA , PFDoA, PFTrDA,
PFTeDA, PFBS,PFPeS , PFHxS ,PFHpS,
PFOS , PFNS,PFDS,PFDoS , FourTwoFTS,
SixTwoFTS,EightTwoFTS, PFOSA , MeFOSA, MeFOSAA,
EtFOSAA, ADONA, ThreeThreeFTCA,
FiveThreeFTCA, SevenThreeFTCA) %>% # 30 PFAS
pivot_longer(cols = c(PFBA , PFPeA , PFHxA , PFHpA , PFOA ,
PFNA , PFDA ,PFUnA , PFDoA, PFTrDA,
PFTeDA, PFBS,PFPeS , PFHxS ,PFHpS,
PFOS , PFNS,PFDS,PFDoS , FourTwoFTS,
SixTwoFTS,EightTwoFTS, PFOSA , MeFOSA, MeFOSAA,
EtFOSAA, ADONA, ThreeThreeFTCA, FiveThreeFTCA, SevenThreeFTCA),
names_to = "Analyte") %>%
mutate(Media = "Sediment",
SampleDate = strptime(SampleDate, format = "%d-%b-%y"))
d_jba_sed_noSum<-tibble::add_column(d_jba_sed_noSum, Precipitation = NA, .before = "Analyte")
# d_jba_biota_noSum <- d_jba_biota %>%
# dplyr::rename(Spp_Loc = Species) %>%
# dplyr::select(SampleID, Spp_Loc, SampleDate,
# ) %>% # 12 PFAS
# pivot_longer(cols = c(PFBA, PFPeA, PFBS, PFHxA, PFPeS,
# PFHpA, PFHxS,PFOA, `6:2 FTS`, PFHpS, PFNA, PFOS),
# names_to = "Analyte") %>%
# mutate(Media = "Biota",
# SampleDate = strptime(SampleDate, format = "%d-%b-%y"))
# function to wrangle the new data ------
# data<-d_jba_sed_new
# location<-"JBA"
# media<-"Sediment"
new_data_prep<-function(location, data, media){
d_new<-data
if(location == "WG"){
d_new$Location<-NA
d_new[which(grepl(pattern = "WEST", d_new$Sample_ID)),"Location"]<-"North"
d_new[which(grepl(pattern = "EAST", d_new$Sample_ID)),"Location"]<-"South"
d_new[which(grepl(pattern = "HEAD", d_new$Sample_ID)),"Location"]<-"Head"
d_new[which(grepl(pattern = "SPILL", d_new$Sample_ID)),"Location"]<-"Spill"
d_new[which(grepl(pattern = "ROAD", d_new$Sample_ID)),"Location"]<-"Road"
d_new[which(grepl(pattern = "FB", d_new$Sample_ID)),"Location"]<-"FB"
d_new[which(grepl(pattern = "EICE", d_new$Sample_ID)),"Location"]<-"EICE"
}
if(location == "JBA"){
d_new$Location<-NA
d_new[which(grepl(pattern = "RIPP", d_new$Sample_ID)),"Location"]<-"Ripp"
d_new[which(grepl(pattern = "SPILL", d_new$Sample_ID)),"Location"]<-"Spill"
d_new[which(grepl(pattern = "EXIT", d_new$Sample_ID)),"Location"]<-"Exit"
d_new[which(grepl(pattern = "DOWN", d_new$Sample_ID)),"Location"]<-"Down"
}
# get the PFAS abbreviation
d_new$Analyte<-sub('.*\\(', '', d_new$Analyte)
d_new$Analyte<-sub('\\)', '', d_new$Analyte)
# summarize and keep long data format
d_new <- d_new %>%
dplyr::select(Sample_ID, Location, Sampling_Date, Analyte, Result.Value) %>%
mutate(Result.Value = as.numeric(Result.Value)) %>%
dplyr::rename(SampleID = Sample_ID,
Spp_Loc = Location,
SampleDate = Sampling_Date,
value = Result.Value) %>%
mutate(Media = media,
SampleDate = strptime(SampleDate, format = "%m/%d/%Y %H:%M:%S"))
return(d_new)
}
d_jba_wat_2024<-new_data_prep(data = d_jba_wat_new,
location = "JBA",
media = "Surface Water")
d_wg_wat_2024<-new_data_prep(data = d_wg_wat_new,
location = "WG",
media = "Surface Water")
d_wg_wat_2024$Analyte<-factor(d_wg_wat_2024$Analyte)
levels(d_wg_wat_2024$Analyte)[levels(d_wg_wat_2024$Analyte)=="4:2 FTS"] <- "FourTwoFTS"
levels(d_wg_wat_2024$Analyte)[levels(d_wg_wat_2024$Analyte)=="6:2 FTS"] <- "SixTwoFTS"
levels(d_wg_wat_2024$Analyte)[levels(d_wg_wat_2024$Analyte)=="8:2 FTS"] <- "EightTwoFTS"
levels(d_wg_wat_2024$Analyte)[levels(d_wg_wat_2024$Analyte)=="7:3 FTCA"] <- "SevenThreeFTCA"
levels(d_wg_wat_2024$Analyte)[levels(d_wg_wat_2024$Analyte)=="5:3 FTCA"] <- "FiveThreeFTCA"
levels(d_wg_wat_2024$Analyte)[levels(d_wg_wat_2024$Analyte)=="3:3 FTCA"] <- "ThreeThreeFTCA"
d_wg_wat_2024<-tibble::add_column(d_wg_wat_2024, Precipitation = NA, .before = "Analyte")
# scales::show_col(carto_pal(nColor, "Safe"))
# create your own color palette (32 colors) based on `seedcolors`
P50 = createPalette(50, c("#ff0000", "#00ff00", "#0000ff"))
all_wg_data<-rbind(d_wg_wat_noSum, d_wg_sed_noSum, d_wg_biota_noSum, d_wg_wat_2024)
names(P50)<-levels(all_wg_data$Analyte)
# apply the function to get the data
Abbi: This plot should be a bar plot of biota, water, and sediment PFAS percent compositions (Figure 2 in paper)
TASKS/QUESTIONS:
fig2_all<-ggplot(rbind(d_wg_wat_noSum,
d_wg_sed_noSum,
d_wg_biota_noSum,
d_wg_wat_2024),
aes(x=Media, y=value, fill=Analyte)) +
geom_bar(position='fill', width = 0.5, stat='identity') +
xlab("") +
scale_fill_manual(values = P50)+
ylab("PFAS Percent Composition") +
theme_bw()
fig2_new<-ggplot(rbind(d_wg_wat_2024),
aes(x=Media, y=value, fill=Analyte)) +
geom_bar(position='fill', width = 0.5, stat='identity') +
xlab("") +
ylab("PFAS Percent Composition") +
theme_bw()
fig2_all
# fig2_new
Abbi: This plot should be the line chart with the 4 sampling locations and sum PFAS surface water concentrations over time (Figure 3 in the paper). For this one I inserted the second y axis and scaled it to the data (Not sure if you know what I mean, but if you can start with recreating the line plot I can add the second y axis because I can’t find that code right now so i’ll have to re-do it, but here’s code to start!)
TASKS/QUESTIONS:
# get data first summarise sum PFAS by date
p.full<-rbind(d_wg_wat_noSum, d_wg_wat_2024) %>%
mutate(SampleID = factor(SampleID)) %>%
dplyr::group_by(SampleID, Spp_Loc, Media, SampleDate) %>%
dplyr::summarise(SumPFAS = sum(value, na.rm = T)) %>%
ggplot(aes(x=as.Date(SampleDate), y=SumPFAS, group=Spp_Loc, color=Spp_Loc)) +
annotate("rect", xmin = as.Date("2020-10-1"), xmax = as.Date("2020-11-20"),
ymin = -Inf, ymax = Inf,
alpha = .1, fill = "black")+
annotate("rect", xmin = as.Date("2021-01-30"), xmax = as.Date("2021-04-01"),
ymin = -Inf, ymax = Inf,
alpha = .1, fill = "black")+
annotate("rect", xmin = as.Date("2021-06-01"), xmax = as.Date("2021-07-30"),
ymin = -Inf, ymax = Inf,
alpha = .1, fill = "black")+# geom_line(alpha = 0.3)+
geom_point(size=0.5)+
ylab("Sum PFAS Concentration (ng/L)") +
xlab("")+
scale_y_continuous(breaks=c(500,1000,1500,2000,2500, 3000, 3500)) +
scale_x_date(date_labels = "%b %d", breaks = "2 months")+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust=1),
panel.grid.minor = element_blank())
p.A<-rbind(d_wg_wat_2024) %>%
mutate(SampleID = factor(SampleID)) %>%
dplyr::group_by(SampleID, Spp_Loc, Media, SampleDate) %>%
dplyr::summarise(SumPFAS = sum(value, na.rm = T)) %>%
filter(SampleDate > as.Date("2021-01-30") & SampleDate < as.Date("2021-04-01")) %>% ggplot(aes(x=as.Date(SampleDate), y=SumPFAS, group=Spp_Loc, color=Spp_Loc)) +
geom_line()+
geom_point(size=0.5)+
ylab("Sum PFAS Concentration (ng/L)") +
xlab("")+
scale_y_continuous(breaks=c(500,1000,1500,2000,2500, 3000, 3500)) +
scale_x_date(date_labels = "%b %d", breaks = "1 week")+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust=1),
panel.grid.minor = element_blank(),
legend.position = "none")
p.B<-rbind(d_wg_wat_2024) %>%
mutate(SampleID = factor(SampleID)) %>%
dplyr::group_by(SampleID, Spp_Loc, Media, SampleDate) %>%
dplyr::summarise(SumPFAS = sum(value, na.rm = T)) %>%
filter(SampleDate > as.Date("2021-06-01") & SampleDate < as.Date("2021-07-30")) %>%
ggplot(aes(x=as.Date(SampleDate), y=SumPFAS, group=Spp_Loc, color=Spp_Loc)) +
geom_line()+
geom_point(size=0.5)+
ylab("Sum PFAS Concentration (ng/L)") +
xlab("")+
scale_y_continuous(breaks=c(500,1000,1500,2000,2500, 3000, 3500)) +
scale_x_date(date_labels = "%b %d", breaks = "1 week")+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust=1),
panel.grid.minor = element_blank(),
legend.position = "none")
p.C<-rbind(d_wg_wat_noSum) %>%
mutate(SampleID = factor(SampleID)) %>%
dplyr::group_by(SampleID, Spp_Loc, Media, SampleDate) %>%
dplyr::summarise(SumPFAS = sum(value, na.rm = T)) %>%
ggplot(aes(x=as.Date(SampleDate), y=SumPFAS, group=Spp_Loc, color=Spp_Loc)) +
geom_line()+
geom_point(size=0.5)+
ylab("Sum PFAS Concentration (ng/L)") +
xlab("")+
scale_y_continuous(breaks=c(500,1000,1500,2000,2500, 3000, 3500)) +
scale_x_date(date_labels = "%b %d", breaks = "1 week")+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust=1),
panel.grid.minor = element_blank(),
legend.position = "none")
row2<-cowplot::plot_grid(p.C, p.A, p.B,
labels = c("B", "C", "D"),
nrow = 1)
cowplot::plot_grid(p.full, row2,
labels = c("A", ""),
nrow = 2)
Abbi: This is for the heatmaps (Figure 4 in the paper). At the time I just added the annotations in the PDF after creating the file, but now I’d do them in R since i’m better at that now haha
TASKS/QUESTIONS:
df2<-rbind(d_wg_wat_noSum, d_wg_wat_2024) %>%
dplyr::group_by(SampleDate, Analyte) %>%
dplyr::summarise(meanPFAS = round(mean(value, na.rm = T), 1)) %>%
mutate(meanPFAS = as.numeric(meanPFAS)) %>%
pivot_wider(values_from = "meanPFAS", names_from = "Analyte") %>%
as.data.frame()
rownames(df2)<-df2$SampleDate
df2<-as.matrix(df2[,-1])
heatmaply(df2,
scale_fill_gradient_fun = ggplot2::scale_fill_gradientn(
colours = c("darkgreen","lightgreen","yellow","orange","red","darkred")),
dendrogram = "none",
xlab = "", ylab = "",
main = "",
scale = "column",
margins = c(40,80,10,10),
grid_color = "white",
grid_width = 0.00001,
titleX = FALSE,
hide_colorbar = FALSE,
cellnote = df2,
cellnote_textposition = "middle center",
cellnote_size = 6,
branches_lwd = 0.1,
fontsize_row = 10,
fontsize_col = 10,
labCol = colnames(df2),
labRow = rownames(df2),
heatmap_layers = theme(axis.line=element_blank(),
# axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_text(size = 12,
color="black",
family = "Times New Roman"))
# file = c("./Heatmap.pdf",
# height=5, width=4, res=2000),
)
Abbi: This plot should be box plots of sediment PFHxS, PFOS, and PFOA concentrations at each of the sampling locations (Figure 5 in paper).
TASKS/QUESTIONS:
# no new sediment
phhxs_wg_sed<-rbind(d_wg_sed_noSum) %>%
filter(Analyte == "PFHxS") %>%
mutate(SampleID = factor(SampleID)) %>%
ggplot(aes(Spp_Loc, value))+
xlab("") +
ylab("PFAS Concentration (ng/g)") +
ggtitle("PFHxS in sediment") +
stat_boxplot(aes(Spp_Loc, value),
geom='errorbar', linetype=1, width=0.25)+
geom_boxplot(aes(Spp_Loc, value),
outlier.shape=3,
outlier.color = "black",
outlier.size = 2.5,
color = "black") +
stat_summary(fun=mean, geom="point", size=1.5, color = "red") +
theme_bw() +
theme(plot.title = element_text(hjust = (0.5), size = 10))
# make water plots?
pfhxs_wg_all<-rbind(d_wg_wat_noSum, d_wg_wat_2024) %>%
filter(Analyte == "PFHxS") %>%
mutate(SampleID = factor(SampleID)) %>%
ggplot(aes(Spp_Loc, value))+
xlab("") +
ylab("PFAS Concentration (ng/g)") +
ggtitle("PFHxS in water") +
stat_boxplot(aes(Spp_Loc, value),
geom='errorbar', linetype=1, width=0.25)+
geom_boxplot(aes(Spp_Loc, value),
outlier.shape=3,
outlier.color = "black",
outlier.size = 2.5,
color = "black") +
stat_summary(fun=mean, geom="point", size=1.5, color = "red") +
theme_bw() +
theme(plot.title = element_text(hjust = (0.5), size = 10))
phos_wg_all<-rbind(d_wg_wat_noSum, d_wg_wat_2024) %>%
filter(Analyte == "PFOS") %>%
mutate(SampleID = factor(SampleID)) %>%
ggplot(aes(Spp_Loc, value))+
xlab("") +
ylab("PFAS Concentration (ng/g)") +
ggtitle("PFOS in water") +
stat_boxplot(aes(Spp_Loc, value),
geom='errorbar', linetype=1, width=0.25)+
geom_boxplot(aes(Spp_Loc, value),
outlier.shape=3,
outlier.color = "black",
outlier.size = 2.5,
color = "black") +
stat_summary(fun=mean, geom="point", size=1.5, color = "red") +
theme_bw() +
theme(plot.title = element_text(hjust = (0.5), size = 10))
phoa_wg_all<-rbind(d_wg_wat_noSum, d_wg_wat_2024) %>%
filter(Analyte == "PFOA") %>%
mutate(SampleID = factor(SampleID)) %>%
ggplot(aes(Spp_Loc, value))+
xlab("") +
ylab("PFAS Concentration (ng/g)") +
ggtitle("PFOA in water") +
stat_boxplot(aes(Spp_Loc, value),
geom='errorbar', linetype=1, width=0.25)+
geom_boxplot(aes(Spp_Loc, value),
outlier.shape=3,
outlier.color = "black",
outlier.size = 2.5,
color = "black") +
stat_summary(fun=mean, geom="point", size=1.5, color = "red") +
theme_bw() +
theme(plot.title = element_text(hjust = (0.5), size = 10))
cowplot::plot_grid(pfhxs_wg_all, phoa_wg_all, phoa_wg_all,
nrow = 1)